package com.shujia.core

import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}

object Demo7Sample {
  def main(args: Array[String]): Unit = {
    // 构建Spark上下文环境
    val conf: SparkConf = new SparkConf()
    conf.setAppName("Demo7Sample")
    conf.setMaster("local")
    val sc: SparkContext = new SparkContext(conf)


    // 1、读取students数据
    val linesRDD: RDD[String] = sc.textFile("Spark/data/stu/students.txt")

    /**
     * sample：转换算子
     * withReplacement：有无放回
     * fraction：抽样比例（最终抽样出来的数据量大致等于抽样比例）
     */

    val sampleRDD: RDD[String] = linesRDD.sample(false, 0.01)
    sampleRDD.foreach(println)


  }

}
